#load packages in!
library(janitor)
library(lubridate)
library(tidyverse)
library(here)
library(paletteer)
library(tsibble)
library(fable)
library(fabletools)
library(feasts)
library(forecast)
library(sf)
library(tmap)
library(mapview)
us_renew <- read_csv(here("data", "renewables_cons_prod.csv")) %>%
clean_names()
renew_clean <- us_renew %>%
mutate(description = str_to_lower(description)) %>%
filter(str_detect(description, pattern = "consumption")) %>%
filter(!str_detect(description, pattern = "total")) # ! denotes "do the opposite"
renew_date <- renew_clean %>%
mutate(yr_mo_day = lubridate::parse_date_time(yyyymm, "ym")) %>%
mutate(month_sep = yearmonth(yr_mo_day)) %>%
mutate(value = as.numeric(value)) %>%
drop_na(month_sep, value)
# make a version where I have the month & year in separate columns
renew_parsed <- renew_date %>%
mutate(month = month(yr_mo_day, label = TRUE)) %>%
mutate(year = year(yr_mo_day))
renew_gg <- ggplot(data = renew_date, aes(x = month_sep,
y = value,
group = description)) + # don't need the group here once you add an aesthetic within the geom_line() function
geom_line(aes(color = description))
renew_gg
Update colord with paletteer palettes:
renew_gg +
scale_color_paletteer_d("vapoRwave::hyperBubble")
333 Coerce our renew_parsed to a tsibble, which is a time series enabled data frame:
renew_ts <- as_tsibble(renew_parsed,
key = description,
index = month_sep)
Let’s look at our ts data ina couple different ways:
renew_ts %>% autoplot(value)
renew_ts %>% gg_subseries(value) # breaks up each description by monthly trends
#renew_ts %>% gg_season(value) # if fancier functions don't work, we can make them in ggplot()
ggplot(data = renew_parsed, aes(x = month,
y = value,
group = year)) +
geom_line(aes(color = year)) +
facet_wrap(~ description,
ncol = 1, # only want one column
scales = "free", # allows each facet_wrapped graph has their own scale
strip.position = "right")
# could knit this so that it's bigger
hydro_ts <- renew_ts %>%
filter(description == "hydroelectric power consumption")
hydro_ts %>% autoplot(value)
hydro_ts %>% gg_subseries(value)
ggplot(hydro_ts, aes(x = month, y = value, group = year)) +
geom_line(aes(color = year))
hydro_quarterly <- hydro_ts %>%
index_by(year_qu = ~(yearquarter(.))) %>%
summarize(avg_consumption = mean(value))
dcmp <- hydro_ts %>%
model(STL(value ~ season(window = 5)))
components(dcmp) %>% autoplot() # gives us the four: original, trend, seasonal, and random. LOOK AT THE SCALE. Residual is less than 10% of total range - so here it looks okay.
hist(components(dcmp)$remainder) # they look normally distributed! Which is what we want? I guess so
Now look at ACF:
hydro_ts %>%
ACF(value) %>%
autoplot() # it looks like there's some seasonality...hooray! Obdervations that are 12 months apart are more closely related than any other time
hydro_model <- hydro_ts %>%
model(
ARIMA(value),# trying to find best combination of pdqPDQ of season values
ETS(value)
) %>%
fabletools::forecast(h = "4 years")
hydro_model %>% autoplot(filter(hydro_ts, year(month_sep) > 2010)) # will only show you forecasred values unless you add the filter part on
# built in increased uncertainty as we get further and further into the future
world <- read_sf(dsn = here("data", "TM_WORLD_BORDERS_SIMPL-0.3-1"),
layer = "TM_WORLD_BORDERS_SIMPL-0.3")
mapview(world)